#Load Packages
library(readr)
library(tidyverse)
library(lubridate)
# Set options for plotting
options(scipen=999999)
# Set working directory to replication folder (Refugee_Info_Replication)
# Open code file from "Refugee_Info_Replication/code/post_type_analysis.R"
# Obtain the full path of the current script in RStudio
script_path <- rstudioapi::getActiveDocumentContext()$path
# If the script path is non-empty, proceed to set the working directory
if (!is.null(script_path)) {
# Calculate the parent directory of the script's directory
parent_directory <- dirname(dirname(script_path))
# Set the working directory to the parent directory
setwd(parent_directory)
} else {
cat("Script path is not set. Ensure your script is saved and you are running RStudio.")
}
# Check Working Directory
getwd()
#Read in Posts, Comments & Metadata
posts<-read_csv("data/posts.csv")
aggregate= posts %>%
group_by(post_type, land_travel, sea_travel, smuggling, asylum, return, camp, employment, scholarship, housing, healthcare) %>%
summarise(total = sum(count), remove=land_travel+sea_travel+smuggling+asylum+return+camp+employment+scholarship+housing+healthcare)
#Histograms by post_type Type
posts$count<-1
aggregate= posts %>%
group_by(post_type, land_travel, sea_travel, smuggling, asylum, return, camp, employment, scholarship, housing, healthcare) %>%
summarise(total = sum(count), remove=land_travel+sea_travel+smuggling+asylum+return+camp+employment+scholarship+housing+healthcare)
aggregate<-unique(subset(aggregate, remove==1))
View(aggregate)
aggregate$denominator<-NA
aggregate$denominator[aggregate$post_type=="official"]<-nrow(subset(posts, post_type=="official"))
aggregate$denominator[aggregate$post_type=="unofficial"]<-nrow(subset(posts, post_type=="unofficial"))
aggregate$denominator[aggregate$post_type=="news"]<-nrow(subset(posts, post_type=="news"))
View(aggregate)
aggregate$label<-c("healthcare", "housing", "education", "employment", "refugee camps", "return", "asylum", "smuggling", "sea travel", "land travel","healthcare", "housing", "education", "employment", "refugee camps", "return", "asylum", "smuggling", "sea travel", "land travel", "healthcare", "housing", "education", "employment", "refugee camps", "return", "asylum", "smuggling", "sea travel", "land travel")
aggregate$label<-factor(aggregate$label,levels = c("asylum", "refugee camps", "employment", "land travel", "sea travel",  "education", "return","healthcare", "smuggling", "housing" ))
View(aggregate)
aggregate= posts %>%
group_by(post_type, land_travel, sea_travel, smuggling, asylum, return, camp, employment, scholarship, housing, healthcare) %>%
summarise(total = sum(count), remove=land_travel+sea_travel+smuggling+asylum+return+camp+employment+scholarship+housing+healthcare)
View(aggregate)
#Load Packages
library(readr)
library(tidyverse)
library(lubridate)
# Set options for plotting
options(scipen=999999)
# Obtain the full path of the current script in RStudio
script_path <- rstudioapi::getActiveDocumentContext()$path
# If the script path is non-empty, proceed to set the working directory
if (!is.null(script_path)) {
# Calculate the parent directory of the script's directory
parent_directory <- dirname(dirname(script_path))
# Set the working directory to the parent directory
setwd(parent_directory)
} else {
cat("Script path is not set. Ensure your script is saved and you are running RStudio.")
}
# Check Working Directory
getwd()
# Read in Monthly Tone Data
monthly_tone<-read_csv("data/monthly_tone.csv")
post_tone<-read_csv("data/post_tone.csv")
cos_agg<-na.omit(post_tone)%>%
group_by(post_type)%>%
summarise(mean = mean(diff_ar))
View(cos_agg)
cos_agg2<-post_tone%>%
group_by(post_type)%>%
summarise(mean = mean(diff_ar))
View(cos_agg2)
cos_agg2<-post_tone%>%
group_by(post_type)%>%
summarise(mean = mean(na.omit(diff_ar)))
View(cos_agg2)
View(cos_agg2)
# Identify top quartile of encouraging and discouraging posts
summary(post_tone$diff_ar)
encouraging<-subset(post_tone, diff_ar>=.0974)
discouraging<-subset(post_tone, diff_ar<=-.0276)
View(discouraging)
# Calculate Mean Engagement for Encouraging vs. Discouraging posts
aggregate= encouraging %>%
group_by(post_type) %>%
summarise(total = sum(engagement_fb))
# Calculate Mean Engagement for Encouraging vs. Discouraging posts
aggregate= encouraging %>%
group_by(post_type) %>%
summarise(total = sum(engagement_fb), mean=mean(engagement_fb))
View(aggregate)
aggregate$denominator<-NA
aggregate$denominator[aggregate$post_type=="official"]<-nrow(subset(encouraging, post_type=="official"))
aggregate$denominator[aggregate$post_type=="unofficial"]<-nrow(subset(encouraging, post_type=="unofficial"))
aggregate$denominator[aggregate$post_type=="news"]<-nrow(subset(encouraging, post_type=="news"))
aggregate$prop<-aggregate$total/as.numeric(aggregate$denominator)
aggregate_encouraging<-aggregate
View(aggregate)
# Identify top quartile of encouraging and discouraging posts
summary(post_tone$diff_ar)
# Calculate Mean Engagement for Encouraging vs. Discouraging posts
aggregate= encouraging %>%
group_by(post_type) %>%
summarise(total = sum(engagement_fb), mean=mean(engagement_fb))
# Calculate Mean Engagement for Encouraging vs. Discouraging posts
aggregate_encouraging= encouraging %>%
group_by(post_type) %>%
summarise(total = sum(engagement_fb), mean=mean(engagement_fb))
# Same for discouraging
aggregate_discouraging= discouraging %>%
group_by(post_type) %>%
summarise(mean = mean(engagement_fb))
View(aggregate_discouraging)
# Label
aggregate_encouraging$sentiment<-"encouraging"
aggregate_discouraging$sentiment<-'discouraging'
ag_total<-rbind(aggregate_encouraging, aggregate_discouraging)
# Calculate Mean Engagement for Encouraging vs. Discouraging posts
aggregate_encouraging= encouraging %>%
group_by(post_type) %>%
summarise(mean=mean(engagement_fb))
# Same for discouraging
aggregate_discouraging= discouraging %>%
group_by(post_type) %>%
summarise(mean = mean(engagement_fb))
# Label
aggregate_encouraging$sentiment<-"encouraging"
aggregate_discouraging$sentiment<-'discouraging'
ag_total<-rbind(aggregate_encouraging, aggregate_discouraging)
View(ag_total)
# Plot
ggplot(ag_total, aes(fill=sentiment, y=prop, x=post_type)) +
geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
labs(y = "Mean Engagement per Post \n", x = "Source")+
scale_fill_grey()+
theme(legend.title=element_blank())
# Plot
ggplot(ag_total, aes(fill=sentiment, y=mean, x=post_type)) +
geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
labs(y = "Mean Engagement per Post \n", x = "Source")+
scale_fill_grey()+
theme(legend.title=element_blank())
post_tone %>%
group_by(post_type) %>%
mutate(quart = case_when(diff_ar >= quantile(diff_ar, 0.75, na.rm = TRUE) ~ "most_encouraging",
diff_ar <= quantile(diff_ar, 0.25, na.rm = TRUE) ~ "most_discouraging", TRUE ~ NA)) -> post_tone_quart
View(post_tone_quart)
post_tone_quart %>%
filter(!is.na(quart)) %>%
group_by(post_type, quart) %>%
summarize(mean_eng = mean(engagement_fb))
test<-post_tone_quart %>%
filter(!is.na(quart)) %>%
group_by(post_type, quart) %>%
summarize(mean_eng = mean(engagement_fb))
View(test)
View(ag_total)
View(post_tone_quart)
quantile(post_tone$diff_ar, 0.75, na.rm=TRUE)
quantile(post_tone$dif_ar, .025, na.rm=TRUE)
quantile(post_tone$diff_ar, .025, na.rm=TRUE)
quantile(post_tone$diff_ar, .25, na.rm=TRUE)
quantile(post_tone$diff_ar, 0.25, na.rm=TRUE)
#Histograms by post_type Type
posts$count<-1
aggregate= posts %>%
group_by(post_type, land_travel, sea_travel, smuggling, asylum, return, camp, employment, scholarship, housing, healthcare) %>%
summarise(total = sum(count), remove=land_travel+sea_travel+smuggling+asylum+return+camp+employment+scholarship+housing+healthcare)
View(aggregate)
aggregate= posts %>%
group_by(post_type, land_travel, sea_travel, smuggling, asylum, return, camp, employment, scholarship, housing, healthcare) %>%
summarise(total = sum(count))
View(aggregate)
topic_totals <- posts %>%
group_by(post_type) %>%
summarize(asylum_total = sum(asylum),
employment_total = sum(employment),
housing_total = sum(housing),
healthcare_total=sum(healthcare),
camp_total=sum(camp),
return_total = sum(return),
land_travel_total=sum(land_travel) ,
sea_travel_total = sum(sea_travel),
scholarship_total = sum(scholarship),
smuggling_total = sum(smuggling))
View(topic_totals)
topic_totals<-as.data.frame(t(topic_totals))
View(topic_totals)
topic_totals <- posts %>%
group_by(post_type) %>%
summarize(asylum_total = sum(asylum),
employment_total = sum(employment),
housing_total = sum(housing),
healthcare_total=sum(healthcare),
camp_total=sum(camp),
return_total = sum(return),
land_travel_total=sum(land_travel) ,
sea_travel_total = sum(sea_travel),
scholarship_total = sum(scholarship),
smuggling_total = sum(smuggling))
topic_totals<-as.data.frame(t(topic_totals), make.names=TRUE)
View(topic_totals)
topic_totals <- posts %>%
group_by(post_type) %>%
summarize(asylum_total = sum(asylum),
employment_total = sum(employment),
housing_total = sum(housing),
healthcare_total=sum(healthcare),
camp_total=sum(camp),
return_total = sum(return),
land_travel_total=sum(land_travel) ,
sea_travel_total = sum(sea_travel),
scholarship_total = sum(scholarship),
smuggling_total = sum(smuggling))
View(topic_totals)
topic_totals <- posts %>%
group_by(post_type) %>%
summarize(asylum_total = sum(asylum),
employment_total = sum(employment),
housing_total = sum(housing),
healthcare_total=sum(healthcare),
camp_total=sum(camp),
return_total = sum(return),
land_travel_total=sum(land_travel) ,
sea_travel_total = sum(sea_travel),
scholarship_total = sum(scholarship),
smuggling_total = sum(smuggling),
denominator = n(post_type))
topic_totals <- posts %>%
group_by(post_type) %>%
summarize(asylum_total = sum(asylum),
employment_total = sum(employment),
housing_total = sum(housing),
healthcare_total=sum(healthcare),
camp_total=sum(camp),
return_total = sum(return),
land_travel_total=sum(land_travel) ,
sea_travel_total = sum(sea_travel),
scholarship_total = sum(scholarship),
smuggling_total = sum(smuggling),
denominator = sum(post_type))
topic_totals <- posts %>%
group_by(post_type) %>%
summarize(asylum_total = sum(asylum),
employment_total = sum(employment),
housing_total = sum(housing),
healthcare_total=sum(healthcare),
camp_total=sum(camp),
return_total = sum(return),
land_travel_total=sum(land_travel) ,
sea_travel_total = sum(sea_travel),
scholarship_total = sum(scholarship),
smuggling_total = sum(smuggling),
denominator = n())
View(topic_totals)
table(posts$post_type)
topic_totals <- posts %>%
group_by(post_type) %>%
summarize(denominator = n(),
asylum_total = sum(asylum)/denominator,
employment_total = sum(employment),
housing_total = sum(housing),
healthcare_total=sum(healthcare),
camp_total=sum(camp),
return_total = sum(return),
land_travel_total=sum(land_travel) ,
sea_travel_total = sum(sea_travel),
scholarship_total = sum(scholarship),
smuggling_total = sum(smuggling))
View(topic_totals)
topic_salience <- posts %>%
group_by(post_type) %>%
summarize(denominator = n(),
asylum = sum(asylum)/denominator,
employment = sum(employment)/denominator,
housing = sum(housing)/denominator,
healthcare=sum(healthcare)/denominator,
refugee_camp=sum(camp)/denominator,
return = sum(return)/denominator,
land_travel=sum(land_travel)/denominator ,
sea_travel = sum(sea_travel)/denominator,
education = sum(scholarship)/denominator,
smuggling = sum(smuggling)/denominator)
View(topic_salience)
topic_salience_plot<-t(topic_salience)
topic_salience_plot<-as.data.frame(t(topic_salience))
View(topic_salience_plot)
View(topic_salience_plot)
View(topic_salience_plot)
View(topic_salience)
topic_salience <- posts %>%
group_by(post_type) %>%
summarize(denominator = n(),
asylum = sum(asylum)/denominator,
employment = sum(employment)/denominator,
housing = sum(housing)/denominator,
healthcare=sum(healthcare)/denominator,
refugee_camp=sum(camp)/denominator,
return = sum(return)/denominator,
land_travel=sum(land_travel)/denominator ,
sea_travel = sum(sea_travel)/denominator,
education = sum(scholarship)/denominator,
smuggling = sum(smuggling)/denominator)%>%
pivot_longer(cols=asylum:smuggling)
View(topic_salience)
topic_salience$label<-factor(topic_salience$name,levels = c("asylum", "refugee camps", "employment", "land travel", "sea travel",  "education", "return","healthcare", "smuggling", "housing" ))
View(topic_salience)
topic_salience <- posts %>%
group_by(post_type) %>%
summarize(denominator = n(),
asylum = sum(asylum)/denominator,
employment = sum(employment)/denominator,
housing = sum(housing)/denominator,
healthcare=sum(healthcare)/denominator,
`refugee camp`=sum(camp)/denominator,
return = sum(return)/denominator,
`land travel`=sum(land_travel)/denominator ,
`sea travel` = sum(sea_travel)/denominator,
education = sum(scholarship)/denominator,
smuggling = sum(smuggling)/denominator)%>%
pivot_longer(cols=asylum:smuggling)
View(topic_salience_plot)
View(topic_salience_plot)
View(topic_salience)
topic_salience$label<-factor(topic_salience$name,levels = c("asylum", "refugee camps", "employment", "land travel", "sea travel",  "education", "return","healthcare", "smuggling", "housing" ))
ggplot(topic_salience, aes(fill=post_type, y=value, x=label)) +
geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
labs(y = "Proportion of Posts", x = "Topic")+
theme(axis.text.x=element_text(angle=90,hjust=1))+
scale_fill_grey()+
theme(legend.title=element_blank())
ggplot(topic_salience, aes(fill=post_type, y=value, x=label)) +
geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
labs(y = "Proportion of Posts", x = "Topic")+
theme(axis.text.x=element_text(angle=90,hjust=1))+
scale_fill_grey()+
theme(legend.title=element_blank())
ggsave("plots/Figure_4a.pdf", width = 11, height = 7)
topic_salience<-na.omit(topic_salience)
ggplot(topic_salience, aes(fill=post_type, y=value, x=label)) +
geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
labs(y = "Proportion of Posts", x = "Topic")+
theme(axis.text.x=element_text(angle=90,hjust=1))+
scale_fill_grey()+
theme(legend.title=element_blank())
ggsave("plots/Figure_4a.pdf", width = 11, height = 7)
# Histograms by Source Type
official$count<-1
# Subset Data to Official Posts Only
official<-subset(posts, post_type=="official")
# Histograms by Source Type
official$count<-1
topic_salience <- official %>%
group_by(pre_2016_source) %>%
summarize(denominator = n(),
asylum = sum(asylum)/denominator,
employment = sum(employment)/denominator,
housing = sum(housing)/denominator,
healthcare=sum(healthcare)/denominator,
`refugee camp`=sum(camp)/denominator,
return = sum(return)/denominator,
`land travel`=sum(land_travel)/denominator ,
`sea travel` = sum(sea_travel)/denominator,
education = sum(scholarship)/denominator,
smuggling = sum(smuggling)/denominator)%>%
pivot_longer(cols=asylum:smuggling)
topic_salience$label<-factor(topic_salience$name,levels = c("asylum", "refugee camps", "employment", "land travel", "sea travel",  "education", "return","healthcare", "smuggling", "housing" ))
topic_salience<-na.omit(topic_salience)
ggplot(aggregate, aes(fill=pre_2016_source, y=prop, x=label)) +
geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
labs(y = "Proportion of Posts", x = "Topic")+
theme(axis.text.x=element_text(angle=90,hjust=1))+
scale_fill_grey()+
theme(legend.title=element_blank())
ggplot(aggregate, aes(fill=pre_2016_source, y=value, x=label)) +
geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
labs(y = "Proportion of Posts", x = "Topic")+
theme(axis.text.x=element_text(angle=90,hjust=1))+
scale_fill_grey()+
theme(legend.title=element_blank())
topic_salience$label<-factor(topic_salience$name,levels = c("asylum", "refugee camps", "employment", "land travel", "sea travel",  "education", "return","healthcare", "smuggling", "housing" ))
topic_salience<-na.omit(topic_salience)
ggplot(aggregate, aes(fill=pre_2016_source, y=value, x=label)) +
geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
labs(y = "Proportion of Posts", x = "Topic")+
theme(axis.text.x=element_text(angle=90,hjust=1))+
scale_fill_grey()+
theme(legend.title=element_blank())
View(topic_salience)
ggplot(topic_salience, aes(fill=pre_2016_source, y=value, x=label)) +
geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
labs(y = "Proportion of Posts", x = "Topic")+
theme(axis.text.x=element_text(angle=90,hjust=1))+
scale_fill_grey()+
theme(legend.title=element_blank())
# Histograms by Source Type
official$count<-1
topic_salience <- official %>%
group_by(pre_2016_source) %>%
summarize(denominator = n(),
asylum = sum(asylum)/denominator,
employment = sum(employment)/denominator,
housing = sum(housing)/denominator,
healthcare=sum(healthcare)/denominator,
`refugee camp`=sum(camp)/denominator,
return = sum(return)/denominator,
`land travel`=sum(land_travel)/denominator ,
`sea travel` = sum(sea_travel)/denominator,
education = sum(scholarship)/denominator,
smuggling = sum(smuggling)/denominator)%>%
pivot_longer(cols=asylum:smuggling)
topic_salience$label<-factor(topic_salience$name,levels = c("asylum", "refugee camp", "employment", "land travel", "sea travel",  "education", "return","healthcare", "smuggling", "housing" ))
topic_salience<-na.omit(topic_salience)
ggplot(topic_salience, aes(fill=pre_2016_source, y=value, x=label)) +
geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
labs(y = "Proportion of Posts", x = "Topic")+
theme(axis.text.x=element_text(angle=90,hjust=1))+
scale_fill_grey()+
theme(legend.title=element_blank())
ggsave("plots/Figure_6.pdf", width = 11, height = 7)
# Identify top quartile of encouraging and discouraging posts
summary(post_tone$diff_ar)
encouraging<-subset(post_tone, diff_ar>=.0974)
discouraging<-subset(post_tone, diff_ar<=-.0276)
# Calculate Mean Engagement for Encouraging vs. Discouraging posts
aggregate_encouraging= encouraging %>%
group_by(post_type) %>%
summarise(mean=mean(engagement_fb))
# Same for discouraging
aggregate_discouraging= discouraging %>%
group_by(post_type) %>%
summarise(mean = mean(engagement_fb))
# Label
aggregate_encouraging$sentiment<-"encouraging"
aggregate_discouraging$sentiment<-'discouraging'
ag_total<-rbind(aggregate_encouraging, aggregate_discouraging)
View(ag_total)
# Plot
ggplot(ag_total, aes(fill=sentiment, y=mean, x=post_type)) +
geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
labs(y = "Mean Engagement per Post \n", x = "Source")+
scale_fill_grey()+
theme(legend.title=element_blank())
ggsave("plots/Figure_8.pdf", width = 11, height = 7)
post_tone %>%
group_by(post_type) %>%
mutate(quart = case_when(diff_ar >= quantile(diff_ar, 0.75, na.rm = TRUE) ~ "most_encouraging",
diff_ar <= quantile(diff_ar, 0.25, na.rm = TRUE) ~ "most_discouraging", TRUE ~ NA)) -> post_tone_quart
# Identify top quartile of encouraging and discouraging posts
summary(post_tone$diff_ar)
encouraging<-subset(post_tone, !is.na(dif_ar) & diff_ar>=.0974)
encouraging<-subset(post_tone, !is.na(diff_ar) & diff_ar>=.0974)
discouraging<-subset(post_tone, !is.na(diff_ar) & diff_ar<=-.0276)
# Calculate Mean Engagement for Encouraging vs. Discouraging posts
aggregate_encouraging= encouraging %>%
group_by(post_type) %>%
summarise(mean=mean(engagement_fb))
# Same for discouraging
aggregate_discouraging= discouraging %>%
group_by(post_type) %>%
summarise(mean = mean(engagement_fb))
# Label
aggregate_encouraging$sentiment<-"encouraging"
aggregate_discouraging$sentiment<-'discouraging'
ag_total<-rbind(aggregate_encouraging, aggregate_discouraging)
View(ag_total)
View(aggregate_encouraging)
View(aggregate_discouraging)
# Identify top quartile of encouraging and discouraging posts
summary(post_tone$diff_ar)
encouraging<-subset(post_tone, diff_ar>=.0974)
discouraging<-subset(post_tone, diff_ar<=-.0276)
# Calculate Mean Engagement for Encouraging vs. Discouraging posts
aggregate_encouraging= encouraging %>%
group_by(post_type) %>%
summarise(mean=mean(engagement_fb))
# Same for discouraging
aggregate_discouraging= discouraging %>%
group_by(post_type) %>%
summarise(mean = mean(engagement_fb))
# Label
aggregate_encouraging$sentiment<-"encouraging"
aggregate_discouraging$sentiment<-'discouraging'
ag_total<-rbind(aggregate_encouraging, aggregate_discouraging)
# Plot
ggplot(ag_total, aes(fill=sentiment, y=mean, x=post_type)) +
geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
labs(y = "Mean Engagement per Post \n", x = "Source")+
scale_fill_grey()+
theme(legend.title=element_blank())
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
library(tidyverse)
posts <- read_csv("posts.csv")
setwd("~/Siegel Dropbox/Alexandra Siegel/Refugee_Info/Refugee_Info_Replication/data/")
